/******************************************************************************************
* Project: Workhorses of Opportunity — Howard and Weinstein
*
* Inputs:
*   - "tfs_cleaned.dta"
*
* Outputs:
*   - within_10mi_by_year.pdf                          // Coefficient plot: attending university within 10 miles by year
*   - university_distance_bins.tex                     // Table: county-level regressions on university distance bins
*   - university_distance_bins_individual_weighted.tex // Table: individual-weighted regressions on university distance bins
*   - university_distance_bins_yearfe.tex              // Table: individual-weighted with year fixed effects on distance bins
*   - university_close_to_home.tex                     // Table: proximity to university (county-level)
*   - university_close_to_home_individual_weighted.tex // Table: proximity to university (individual-weighted)
*   - university_close_to_home_yearfe.tex              // Table: proximity to university (individual-weighted with year FE)
******************************************************************************************/

use "tfs_cleaned", clear

* Define variables of interest
local vars disthome dist10orless dist11to50 dist51to100 dist101to500 distgt500 attendnormwithin10 homeinstsamecty attendnormincty 
keep `vars' home_hasnormalschool homestate year homectyfips studwgt

gen numobs = 1

* Run three versions of regressions for each variable:
* 1. Individual-weighted with and without year FE
* 2. Collapsed to county level
foreach x of local vars {
	preserve
		keep if `x' != .
		bys homectyfips: egen totalweight = total(studwgt)
		bys homectyfips: gen invweight = 1/totalweight * studwgt

		reghdfe `x' home_hasnormalschool if home_hasnormalschool ~= . [aw=invweight], absorb(homestate) cluster(homestate)
		estimates store `x'_inv

		reghdfe `x' home_hasnormalschool if home_hasnormalschool ~= . [aw=invweight], absorb(homestate#year) cluster(homestate)
		estimates store `x'_yearfe

		collapse (mean) `x' home_hasnormalschool homestate [aw=studwgt], by(homectyfips)
		reghdfe `x' home_hasnormalschool if home_hasnormalschool ~= ., absorb(homestate) cluster(homestate)
		estimates store `x'
	restore
}

label var home_hasnormalschool "Grew up in normal school county"

* Year-by-year effect of growing up near a normal school on attending nearby university
preserve
	keep if dist10orless != .
	bys homectyfips: egen totalweight = total(studwgt)
	bys homectyfips: gen invweight = 1/totalweight * studwgt

	reghdfe dist10orless 1.home_hasnormalschool#i.year [aw=invweight], absorb(homestate#year) cluster(homestate)
	coefplot, drop(_cons) xline(0, lpattern(dash) lcolor(gs8))
	graph export "within_10mi_by_year.pdf", as(pdf) replace
restore

* Export regression tables with various specifications

esttab dist10orless dist11to50 dist51to100 dist101to500 distgt500 using "university_distance_bins.tex", ///
	tex replace se label mtitle("$\le$10" "11-50" "51-100" "101-500" "$>$500") ///
	mgroups("Home-University Distance (miles)", pattern(1 0 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span ///
	erepeat(\cmidrule(lr){@span})) nonotes ///
	addnotes("Standard errors clustered by state. * p\$<$0.05, ** p\$<$0.01, *** p\$<$0.001", ///
	"Observations are at the county level. All regressions include state fixed effects.") drop(_cons) r2

esttab dist10orless_inv dist11to50_inv dist51to100_inv dist101to500_inv distgt500_inv using "university_distance_bins_individual_weighted.tex", ///
	tex replace se label mtitle("$\le$10" "11-50" "51-100" "101-500" "$>$500") ///
	mgroups("Home-University Distance (miles)", pattern(1 0 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span ///
	erepeat(\cmidrule(lr){@span})) nonotes ///
	addnotes("Standard errors clustered by state. * p\$<$0.05, ** p\$<$0.01, *** p\$<$0.001", ///
	"Observations are at the individual level and weighted so each county has equal total weight. All regressions include state fixed effects.") ///
	drop(_cons) r2

esttab dist10orless_yearfe dist11to50_yearfe dist51to100_yearfe dist101to500_yearfe distgt500_yearfe using "university_distance_bins_yearfe.tex", ///
	tex replace se label mtitle("$\le$10" "11-50" "51-100" "101-500" "$>$500") ///
	mgroups("Home-University Distance (miles)", pattern(1 0 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span ///
	erepeat(\cmidrule(lr){@span})) nonotes ///
	addnotes("Standard errors clustered by state. * p\$<$0.05, ** p\$<$0.01, *** p\$<$0.001", ///
	"Observations are at the individual level and weighted so that each county has equal total weight. All regressions include state-year fixed effects.") ///
	drop(_cons) r2

* Alternate formats (same content, no LaTeX export)
esttab dist10orless dist11to50 dist51to100 dist101to500 distgt500, se label ///
	mtitle("{&le}10" "11-50" "51-100" "101-500" ">500") ///
	mgroups("Home-University Distance (miles)", pattern(1 0 0 0 0) span) ///
	nonotes addnotes("Standard errors clustered by state. * p<0.05, ** p<0.01, *** p<0.001", ///
	"Observations are at the county level. All regressions include state fixed effects.") ///
	drop(_cons) r2

esttab dist10orless_inv dist11to50_inv dist51to100_inv dist101to500_inv distgt500_inv, se label ///
	mtitle("{&le}10" "11-50" "51-100" "101-500" ">500") ///
	mgroups("Home-University Distance (miles)", pattern(1 0 0 0 0) span) ///
	nonotes addnotes("Standard errors clustered by state. * p<0.05, ** p<0.01, *** p<0.001", ///
	"Observations are at the county level. All regressions include state fixed effects.") ///
	drop(_cons) r2

esttab dist10orless_yearfe dist11to50_yearfe dist51to100_yearfe dist101to500_yearfe distgt500_yearfe, se label ///
	mtitle("{&le}10" "11-50" "51-100" "101-500" ">500") ///
	mgroups("Home-University Distance (miles)", pattern(1 0 0 0 0) span) ///
	nonotes addnotes("Standard errors clustered by state. * p<0.05, ** p<0.01, *** p<0.001", ///
	"Observations are at the county level. All regressions include state fixed effects.") ///
	drop(_cons) r2

* Additional proximity outcome regressions (county vs individual weights)
esttab dist10orless attendnormwithin10 homeinstsamecty attendnormincty using "university_close_to_home.tex", ///
	tex replace label se drop(_cons) nonotes ///
	addnotes("Standard errors clustered by state. * p\$<$0.05, ** p\$<$0.01, *** p\$<$0.001", ///
	"Observations are at the county level. All regressions include state fixed effects.") ///
	mtitle("\shortstack{Attend Univ. \\ within 10mi}" "\shortstack{Attend Former-Normal\\within 10mi}" ///
	"\shortstack{Attend Univ\\in county}" "\shortstack{Attend Former-Normal\\in county}") gaps

esttab dist10orless_inv attendnormwithin10_inv homeinstsamecty_inv attendnormincty_inv using "university_close_to_home_individual_weighted.tex", ///
	tex replace label se drop(_cons) nonotes ///
	addnotes("Standard errors clustered by state. * p\$<$0.05, ** p\$<$0.01, *** p\$<$0.001", ///
	"Observations are at the individual level and weighted so each county has equal total weight. All regressions include state fixed effects.") ///
	mtitle("\shortstack{Attend Univ. \\ within 10mi}" "\shortstack{Attend Former-Normal\\within 10mi}" ///
	"\shortstack{Attend Univ\\in county}" "\shortstack{Attend Former-Normal\\in county}") gaps

esttab dist10orless_yearfe attendnormwithin10_yearfe homeinstsamecty_yearfe attendnormincty_yearfe using "university_close_to_home_yearfe.tex", ///
	tex replace label se drop(_cons) nonotes ///
	addnotes("Standard errors clustered by state. * p\$<$0.05, ** p\$<$0.01, *** p\$<$0.001", ///
	"Observations are at the individual level and weighted so each county has equal total weight. All regressions include state-year fixed effects.") ///
	mtitle("\shortstack{Attend Univ. \\ within 10mi}" "\shortstack{Attend Former-Normal\\within 10mi}" ///
	"\shortstack{Attend Univ\\in county}" "\shortstack{Attend Former-Normal\\in county}") gaps

* Non-LaTeX version
esttab dist10orless attendnormwithin10 homeinstsamecty attendnormincty, label se drop(_cons) nonotes ///
	addnotes("Standard errors clustered by state. * p<0.05, ** p<0.01, *** p<0.001", ///
	"Observations are at the county level. All regressions include state fixed effects.") ///
	mtitle("Attend Univ within 10mi" "Attend Former-Normal within 10mi" ///
	"Attend Univ in county" "Attend Normal in county")

esttab dist10orless_inv attendnormwithin10_inv homeinstsamecty_inv attendnormincty_inv, label se drop(_cons) nonotes ///
	addnotes("Standard errors clustered by state. * p<0.05, ** p<0.01, *** p<0.001", ///
	"Observations are at the individual level. All regressions include state fixed effects.") ///
	mtitle("Attend Univ within 10mi" "Attend Former-Normal within 10mi" ///
	"Attend Univ in county" "Attend Normal in county")

esttab dist10orless_yearfe attendnormwithin10_yearfe homeinstsamecty_yearfe attendnormincty_yearfe, label se drop(_cons) nonotes ///
	addnotes("Standard errors clustered by state. * p<0.05, ** p<0.01, *** p<0.001", ///
	"Observations are at the individual level. All regressions include state-year fixed effects.") ///
	mtitle("Attend Univ within 10mi" "Attend Former-Normal within 10mi" ///
	"Attend Univ in county" "Attend Normal in county")
